gusucode.com > 《MATLAB图像与视频处理实用案例详解》代码 > 《MATLAB图像与视频处理实用案例详解》代码/第 19 章 基于语音识别的信号灯图像模拟控制技术/voicebox/melbankm.m
function [x,mc,mn,mx]=melbankm(p,n,fs,fl,fh,w) %MELBANKM determine matrix for a mel/erb/bark-spaced filterbank [X,MN,MX]=(P,N,FS,FL,FH,W) % % Inputs: % p number of filters in filterbank or the filter spacing in k-mel/bark/erb [ceil(4.6*log10(fs))] % n length of fft % fs sample rate in Hz % fl low end of the lowest filter as a fraction of fs [default = 0] % fh high end of highest filter as a fraction of fs [default = 0.5] % w any sensible combination of the following: % 'b' = bark scale instead of mel % 'e' = erb-rate scale % 'l' = log10 Hz frequency scale % 'f' = linear frequency scale % % 'c' = fl/fh specify centre of low and high filters % 'h' = fl/fh are in Hz instead of fractions of fs % 'H' = fl/fh are in mel/erb/bark/log10 % % 't' = triangular shaped filters in mel/erb/bark domain (default) % 'n' = hanning shaped filters in mel/erb/bark domain % 'm' = hamming shaped filters in mel/erb/bark domain % % 'z' = highest and lowest filters taper down to zero [default] % 'y' = lowest filter remains at 1 down to 0 frequency and % highest filter remains at 1 up to nyquist freqency % % 'u' = scale filters to sum to unity % % 's' = single-sided: do not double filters to account for negative frequencies % % 'g' = plot idealized filters [default if no output arguments present] % % Note that the filter shape (triangular, hamming etc) is defined in the mel (or erb etc) domain. % Some people instead define an asymmetric triangular filter in the frequency domain. % % If 'ty' or 'ny' is specified, the total power in the fft is preserved. % % Outputs: x a sparse matrix containing the filterbank amplitudes % If the mn and mx outputs are given then size(x)=[p,mx-mn+1] % otherwise size(x)=[p,1+floor(n/2)] % Note that the peak filter values equal 2 to account for the power % in the negative FFT frequencies. % mc the filterbank centre frequencies in mel/erb/bark % mn the lowest fft bin with a non-zero coefficient % mx the highest fft bin with a non-zero coefficient % Note: you must specify both or neither of mn and mx. % % Examples of use: % % (a) Calcuate the Mel-frequency Cepstral Coefficients % % f=rfft(s); % rfft() returns only 1+floor(n/2) coefficients % x=melbankm(p,n,fs); % n is the fft length, p is the number of filters wanted % z=log(x*abs(f).^2); % multiply x by the power spectrum % c=dct(z); % take the DCT % % (b) Calcuate the Mel-frequency Cepstral Coefficients efficiently % % f=fft(s); % n is the fft length, p is the number of filters wanted % [x,mc,na,nb]=melbankm(p,n,fs); % na:nb gives the fft bins that are needed % z=log(x*(f(na:nb)).*conj(f(na:nb))); % % (c) Plot the calculated filterbanks % % plot((0:floor(n/2))*fs/n,melbankm(p,n,fs)') % fs=sample frequency % % (d) Plot the idealized filterbanks (without output sampling) % % melbankm(p,n,fs); % % References: % % [1] S. S. Stevens, J. Volkman, and E. B. Newman. A scale for the measurement % of the psychological magnitude of pitch. J. Acoust Soc Amer, 8: 185?9, 1937. % [2] S. Davis and P. Mermelstein. Comparison of parametric representations for % monosyllabic word recognition in continuously spoken sentences. % IEEE Trans Acoustics Speech and Signal Processing, 28 (4): 357?66, Aug. 1980. % Copyright (C) Mike Brookes 1997-2009 % Version: $Id: melbankm.m,v 1.11 2010/01/02 20:02:22 dmb Exp $ % % VOICEBOX is a MATLAB toolbox for speech processing. % Home page: http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % This program is free software; you can redistribute it and/or modify % it under the terms of the GNU General Public License as published by % the Free Software Foundation; either version 2 of the License, or % (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You can obtain a copy of the GNU General Public License from % http://www.gnu.org/copyleft/gpl.html or by writing to % Free Software Foundation, Inc.,675 Mass Ave, Cambridge, MA 02139, USA. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Note "FFT bin_0" assumes DC = bin 0 whereas "FFT bin_1" means DC = bin 1 if nargin < 6 w='tz'; % default options if nargin < 5 fh=0.5; % max freq is the nyquist if nargin < 4 fl=0; % min freq is DC end end end sfact=2-any(w=='s'); % 1 if single sided else 2 wr=' '; % default warping is mel for i=1:length(w) if any(w(i)=='lebf'); wr=w(i); end end if any(w=='h') || any(w=='H') mflh=[fl fh]; else mflh=[fl fh]*fs; end if ~any(w=='H') switch wr case 'f' % no transformation case 'l' if fl<=0 error('Low frequency limit must be >0 for l option'); end mflh=log10(mflh); % convert frequency limits into log10 Hz case 'e' mflh=frq2erb(mflh); % convert frequency limits into erb-rate case 'b' mflh=frq2bark(mflh); % convert frequency limits into bark otherwise mflh=frq2mel(mflh); % convert frequency limits into mel end end melrng=mflh*(-1:2:1)'; % mel range fn2=floor(n/2); % bin index of highest positive frequency (Nyquist if n is even) if isempty(p) p=ceil(4.6*log10(fs)); % default number of filters end if any(w=='c') % c option: specify fiter centres not edges if p<1 p=round(melrng/(p*1000))+1; end melinc=melrng/(p-1); mflh=mflh+(-1:2:1)*melinc; else if p<1 p=round(melrng/(p*1000))-1; end melinc=melrng/(p+1); end % % Calculate the FFT bins corresponding to [filter#1-low filter#1-mid filter#p-mid filter#p-high] % switch wr case 'f' blim=(mflh(1)+[0 1 p p+1]*melinc)*n/fs; case 'l' blim=10.^(mflh(1)+[0 1 p p+1]*melinc)*n/fs; case 'e' blim=erb2frq(mflh(1)+[0 1 p p+1]*melinc)*n/fs; case 'b' blim=bark2frq(mflh(1)+[0 1 p p+1]*melinc)*n/fs; otherwise blim=mel2frq(mflh(1)+[0 1 p p+1]*melinc)*n/fs; end mc=mflh(1)+(1:p)*melinc; % mel centre frequencies b1=floor(blim(1))+1; % lowest FFT bin_0 required might be negative) b4=min(fn2,ceil(blim(4))-1); % highest FFT bin_0 required % % now map all the useful FFT bins_0 to filter1 centres % switch wr case 'f' pf=((b1:b4)*fs/n-mflh(1))/melinc; case 'l' pf=(log10((b1:b4)*fs/n)-mflh(1))/melinc; case 'e' pf=(frq2erb((b1:b4)*fs/n)-mflh(1))/melinc; case 'b' pf=(frq2bark((b1:b4)*fs/n)-mflh(1))/melinc; otherwise pf=(frq2mel((b1:b4)*fs/n)-mflh(1))/melinc; end % % remove any incorrect entries in pf due to rounding errors % if pf(1)<0 pf(1)=[]; b1=b1+1; end if pf(end)>=p+1 pf(end)=[]; b4=b4-1; end fp=floor(pf); % FFT bin_0 i contributes to filters_1 fp(1+i-b1)+[0 1] pm=pf-fp; % multiplier for upper filter k2=find(fp>0,1); % FFT bin_1 k2+b1 is the first to contribute to both upper and lower filters k3=find(fp<p,1,'last'); % FFT bin_1 k3+b1 is the last to contribute to both upper and lower filters k4=numel(fp); % FFT bin_1 k4+b1 is the last to contribute to any filters if isempty(k2) k2=k4+1; end if isempty(k3) k3=0; end if any(w=='y') % preserve power in FFT mn=1; % lowest fft bin required (1 = DC) mx=fn2+1; % highest fft bin required (1 = DC) r=[ones(1,k2+b1-1) 1+fp(k2:k3) fp(k2:k3) repmat(p,1,fn2-k3-b1+1)]; % filter number_1 c=[1:k2+b1-1 k2+b1:k3+b1 k2+b1:k3+b1 k3+b1+1:fn2+1]; % FFT bin1 v=[ones(1,k2+b1-1) pm(k2:k3) 1-pm(k2:k3) ones(1,fn2-k3-b1+1)]; else r=[1+fp(1:k3) fp(k2:k4)]; % filter number_1 c=[1:k3 k2:k4]; % FFT bin_1 - b1 v=[pm(1:k3) 1-pm(k2:k4)]; mn=b1+1; % lowest fft bin_1 mx=b4+1; % highest fft bin_1 end if b1<0 c=abs(c+b1-1)-b1+1; % convert negative frequencies into positive end % end if any(w=='n') v=0.5-0.5*cos(v*pi); % convert triangles to Hanning elseif any(w=='m') v=0.5-0.46/1.08*cos(v*pi); % convert triangles to Hamming end if sfact==2 % double all except the DC and Nyquist (if any) terms msk=(c+mn>2) & (c+mn<n-fn2+2); % there is no Nyquist term if n is odd v(msk)=2*v(msk); end % % sort out the output argument options % if nargout > 2 x=sparse(r,c,v); if nargout == 3 % if exactly three output arguments, then mc=mn; % delete mc output for legacy code compatibility mn=mx; end else x=sparse(r,c+mn-1,v,p,1+fn2); end if any(w=='u') sx=sum(x,2); x=x./repmat(sx+(sx==0),1,size(x,2)); end % % plot results if no output arguments or g option given % if ~nargout || any(w=='g') % plot idealized filters ng=201; % 201 points me=mflh(1)+(0:p+1)'*melinc; switch wr case 'f' fe=me; % defining frequencies xg=repmat(linspace(0,1,ng),p,1).*repmat(me(3:end)-me(1:end-2),1,ng)+repmat(me(1:end-2),1,ng); case 'l' fe=10.^me; % defining frequencies xg=10.^(repmat(linspace(0,1,ng),p,1).*repmat(me(3:end)-me(1:end-2),1,ng)+repmat(me(1:end-2),1,ng)); case 'e' fe=erb2frq(me); % defining frequencies xg=erb2frq(repmat(linspace(0,1,ng),p,1).*repmat(me(3:end)-me(1:end-2),1,ng)+repmat(me(1:end-2),1,ng)); case 'b' fe=bark2frq(me); % defining frequencies xg=bark2frq(repmat(linspace(0,1,ng),p,1).*repmat(me(3:end)-me(1:end-2),1,ng)+repmat(me(1:end-2),1,ng)); otherwise fe=mel2frq(me); % defining frequencies xg=mel2frq(repmat(linspace(0,1,ng),p,1).*repmat(me(3:end)-me(1:end-2),1,ng)+repmat(me(1:end-2),1,ng)); end v=1-abs(linspace(-1,1,ng)); if any(w=='n') v=0.5-0.5*cos(v*pi); % convert triangles to Hanning elseif any(w=='m') v=0.5-0.46/1.08*cos(v*pi); % convert triangles to Hamming end v=v*sfact; % multiply by 2 if double sided v=repmat(v,p,1); if any(w=='y') % extend first and last filters v(1,xg(1,:)<fe(2))=sfact; v(end,xg(end,:)>fe(p+1))=sfact; end if any(w=='u') % scale to unity sum dx=(xg(:,3:end)-xg(:,1:end-2))/2; dx=dx(:,[1 1:ng-2 ng-2]); vs=sum(v.*dx,2); v=v./repmat(vs+(vs==0),1,ng)*fs/n; end plot(xg',v','b'); set(gca,'xlim',[fe(1) fe(end)]); xlabel(['Frequency (' xticksi 'Hz)']); end